Loading libraries

dyn.load("/home/moyra.lawrence/miniconda3/envs/monocle3/lib/libgeos_c.so.1")
library(Seurat)
## Warning in fun(libname, pkgname): rgeos: versions of GEOS runtime 3.11.0-CAPI-1.17.0
## and GEOS at installation 3.8.0-CAPI-1.13.1differ
library(patchwork)
library(ggplot2)
library(gridExtra)
library(openxlsx)
library(svglite)
library(dplyr)
library(gt)
library(tidyverse)
library(stringi)
library(SeuratObject)

Loading embryo data and preparing for seurat

raw_data <- data.table::fread("/home/moyra.lawrence/R_analysis/Second_10x_run/GSE45719_matrix.txt.gz", data.table = FALSE)

names(raw_data)[1] <- 'Gene'
names <- make.unique(raw_data$Gene)
rownames(raw_data) <- names
raw_data <- raw_data[,-1] # get rid of old names

colnames(raw_data) <- gsub("_", "-", colnames(raw_data))
colnames(raw_data) <- stri_replace_all_fixed(str = colnames(raw_data), pattern = "_", replacement = "-")
colnames(raw_data) <- stri_replace_all_fixed(str = colnames(raw_data), pattern = "zy1", replacement = "zygote-1")
colnames(raw_data) <- stri_replace_all_fixed(str = colnames(raw_data), pattern = "zy2", replacement = "zygote-2")
colnames(raw_data) <- stri_replace_all_fixed(str = colnames(raw_data), pattern = "zy3", replacement = "zygote-3")
colnames(raw_data) <- stri_replace_all_fixed(str = colnames(raw_data), pattern = "zy4", replacement = "zygote-4")

ML11 <- CreateSeuratObject(counts=raw_data, project="ML11", names.delim="-")
## Warning: Feature names cannot have pipe characters ('|'), replacing with dashes
## ('-')
ML11$orig.ident
##            16cell-1-10            16cell-1-11            16cell-1-12 
##                 16cell                 16cell                 16cell 
##            16cell-1-13            16cell-1-14            16cell-1-15 
##                 16cell                 16cell                 16cell 
##             16cell-1-2             16cell-1-3             16cell-1-4 
##                 16cell                 16cell                 16cell 
##             16cell-1-5             16cell-1-6             16cell-1-7 
##                 16cell                 16cell                 16cell 
##             16cell-1-8             16cell-1-9             16cell-4-1 
##                 16cell                 16cell                 16cell 
##            16cell-4-10            16cell-4-11             16cell-4-2 
##                 16cell                 16cell                 16cell 
##             16cell-4-3             16cell-4-4             16cell-4-5 
##                 16cell                 16cell                 16cell 
##             16cell-4-6             16cell-4-7             16cell-4-8 
##                 16cell                 16cell                 16cell 
##             16cell-4-9             16cell-5-1            16cell-5-10 
##                 16cell                 16cell                 16cell 
##            16cell-5-11            16cell-5-12            16cell-5-13 
##                 16cell                 16cell                 16cell 
##             16cell-5-2             16cell-5-3             16cell-5-4 
##                 16cell                 16cell                 16cell 
##             16cell-5-5             16cell-5-6             16cell-5-7 
##                 16cell                 16cell                 16cell 
##             16cell-5-8             16cell-5-9             16cell-6-1 
##                 16cell                 16cell                 16cell 
##            16cell-6-10            16cell-6-11            16cell-6-12 
##                 16cell                 16cell                 16cell 
##             16cell-6-2             16cell-6-3             16cell-6-4 
##                 16cell                 16cell                 16cell 
##             16cell-6-5             16cell-6-6             16cell-6-7 
##                 16cell                 16cell                 16cell 
##             16cell-6-8             16cell-6-9              4cell-1-1 
##                 16cell                 16cell                  4cell 
##              4cell-1-2              4cell-1-4              4cell-2-1 
##                  4cell                  4cell                  4cell 
##              4cell-2-2              4cell-2-3              4cell-2-4 
##                  4cell                  4cell                  4cell 
##              4cell-3-1              4cell-3-3              4cell-3-4 
##                  4cell                  4cell                  4cell 
##              4cell-4-1              4cell-4-2              4cell-4-3 
##                  4cell                  4cell                  4cell 
##              4cell-4-4              8cell-1-1              8cell-1-2 
##                  4cell                  8cell                  8cell 
##              8cell-1-4              8cell-1-5              8cell-1-6 
##                  8cell                  8cell                  8cell 
##              8cell-1-7              8cell-1-8              8cell-2-1 
##                  8cell                  8cell                  8cell 
##              8cell-2-2              8cell-2-3              8cell-2-4 
##                  8cell                  8cell                  8cell 
##              8cell-2-6              8cell-2-7              8cell-2-8 
##                  8cell                  8cell                  8cell 
##              8cell-5-1              8cell-5-2              8cell-5-3 
##                  8cell                  8cell                  8cell 
##              8cell-5-4              8cell-5-6              8cell-5-7 
##                  8cell                  8cell                  8cell 
##              8cell-5-8              8cell-8-1              8cell-8-2 
##                  8cell                  8cell                  8cell 
##              8cell-8-3              8cell-8-4              8cell-8-6 
##                  8cell                  8cell                  8cell 
##              8cell-8-7              8cell-8-8  BXC-100pg-liver-RNA-1 
##                  8cell                  8cell                    BXC 
##  BXC-100pg-liver-RNA-2   BXC-10pg-liver-RNA-1   BXC-10pg-liver-RNA-2 
##                    BXC                    BXC                    BXC 
##   BXC-1ng-liver-RNA-0r    BXC-1ng-liver-RNA-1  BXC-30pg-liver-RNA-0r 
##                    BXC                    BXC                    BXC 
##   BXC-30pg-liver-RNA-2       BXC-liver-cell-1       BXC-liver-cell-2 
##                    BXC                    BXC                    BXC 
##       BXC-liver-cell-4       BXC-liver-cell-5       BXC-liver-cell-6 
##                    BXC                    BXC                    BXC 
##        C57twocell-16-1        C57twocell-16-2        C57twocell-18-1 
##             C57twocell             C57twocell             C57twocell 
##        C57twocell-18-2        C57twocell-20-1        C57twocell-20-2 
##             C57twocell             C57twocell             C57twocell 
##        C57twocell-22-1        C57twocell-22-2        early2cell-0r-1 
##             C57twocell             C57twocell             early2cell 
##        early2cell-0r-2         early2cell-1-1         early2cell-1-2 
##             early2cell             early2cell             early2cell 
##         early2cell-2-1         early2cell-2-2         early2cell-3-1 
##             early2cell             early2cell             early2cell 
##         early2cell-3-2         earlyblast-2-1        earlyblast-2-10 
##             early2cell             earlyblast             earlyblast 
##        earlyblast-2-12        earlyblast-2-15        earlyblast-2-16 
##             earlyblast             earlyblast             earlyblast 
##        earlyblast-2-17        earlyblast-2-19         earlyblast-2-2 
##             earlyblast             earlyblast             earlyblast 
##        earlyblast-2-22         earlyblast-2-3         earlyblast-2-4 
##             earlyblast             earlyblast             earlyblast 
##         earlyblast-2-5         earlyblast-2-6         earlyblast-2-7 
##             earlyblast             earlyblast             earlyblast 
##         earlyblast-2-8         earlyblast-3-1        earlyblast-3-10 
##             earlyblast             earlyblast             earlyblast 
##        earlyblast-3-12        earlyblast-3-13        earlyblast-3-14 
##             earlyblast             earlyblast             earlyblast 
##        earlyblast-3-15        earlyblast-3-16        earlyblast-3-17 
##             earlyblast             earlyblast             earlyblast 
##         earlyblast-3-2         earlyblast-3-3         earlyblast-3-4 
##             earlyblast             earlyblast             earlyblast 
##         earlyblast-3-6         earlyblast-3-7         earlyblast-3-8 
##             earlyblast             earlyblast             earlyblast 
##         earlyblast-3-9         earlyblast-4-1        earlyblast-4-12 
##             earlyblast             earlyblast             earlyblast 
##        earlyblast-4-13        earlyblast-4-14        earlyblast-4-16 
##             earlyblast             earlyblast             earlyblast 
##        earlyblast-4-17        earlyblast-4-18         earlyblast-4-2 
##             earlyblast             earlyblast             earlyblast 
##         earlyblast-4-3         earlyblast-4-5         earlyblast-4-6 
##             earlyblast             earlyblast             earlyblast 
##         earlyblast-4-8         earlyblast-4-9          late2cell-5-1 
##             earlyblast             earlyblast              late2cell 
##          late2cell-5-2          late2cell-6-1          late2cell-6-2 
##              late2cell              late2cell              late2cell 
##          late2cell-7-1          late2cell-7-2          late2cell-8-1 
##              late2cell              late2cell              late2cell 
##          late2cell-8-2          late2cell-9-1          late2cell-9-2 
##              late2cell              late2cell              late2cell 
##         lateblast-1-10         lateblast-1-11         lateblast-1-13 
##              lateblast              lateblast              lateblast 
##         lateblast-1-14         lateblast-1-16         lateblast-1-19 
##              lateblast              lateblast              lateblast 
##          lateblast-1-2         lateblast-1-20         lateblast-1-21 
##              lateblast              lateblast              lateblast 
##         lateblast-1-23         lateblast-1-24         lateblast-1-26 
##              lateblast              lateblast              lateblast 
##         lateblast-1-27          lateblast-1-4          lateblast-1-5 
##              lateblast              lateblast              lateblast 
##          lateblast-1-6          lateblast-1-7          lateblast-1-8 
##              lateblast              lateblast              lateblast 
##          lateblast-1-9          lateblast-2-1         lateblast-2-12 
##              lateblast              lateblast              lateblast 
##         lateblast-2-14         lateblast-2-16         lateblast-2-17 
##              lateblast              lateblast              lateblast 
##          lateblast-2-2          lateblast-2-3          lateblast-2-5 
##              lateblast              lateblast              lateblast 
##          lateblast-2-7          lateblast-2-8          lateblast-2-9 
##              lateblast              lateblast              lateblast 
##          mid2cell-0r-1          mid2cell-0r-2           mid2cell-3-1 
##               mid2cell               mid2cell               mid2cell 
##           mid2cell-3-2           mid2cell-4-1           mid2cell-4-2 
##               mid2cell               mid2cell               mid2cell 
##           mid2cell-5-1           mid2cell-5-2           mid2cell-6-1 
##               mid2cell               mid2cell               mid2cell 
##           mid2cell-6-2           mid2cell-7-1           mid2cell-7-2 
##               mid2cell               mid2cell               mid2cell 
##           midblast-1-1          midblast-1-10          midblast-1-11 
##               midblast               midblast               midblast 
##          midblast-1-12          midblast-1-13          midblast-1-14 
##               midblast               midblast               midblast 
##          midblast-1-15          midblast-1-16          midblast-1-17 
##               midblast               midblast               midblast 
##          midblast-1-18          midblast-1-19           midblast-1-2 
##               midblast               midblast               midblast 
##          midblast-1-20          midblast-1-22          midblast-1-23 
##               midblast               midblast               midblast 
##          midblast-1-24           midblast-1-3           midblast-1-4 
##               midblast               midblast               midblast 
##           midblast-1-5           midblast-1-6           midblast-1-8 
##               midblast               midblast               midblast 
##           midblast-1-9           midblast-2-1          midblast-2-10 
##               midblast               midblast               midblast 
##          midblast-2-11          midblast-2-12          midblast-2-13 
##               midblast               midblast               midblast 
##          midblast-2-14          midblast-2-15          midblast-2-16 
##               midblast               midblast               midblast 
##          midblast-2-17          midblast-2-18           midblast-2-2 
##               midblast               midblast               midblast 
##          midblast-2-23          midblast-2-24           midblast-2-3 
##               midblast               midblast               midblast 
##           midblast-2-4           midblast-2-5           midblast-2-6 
##               midblast               midblast               midblast 
##           midblast-2-7           midblast-2-8           midblast-2-9 
##               midblast               midblast               midblast 
##           midblast-3-1          midblast-3-10          midblast-3-11 
##               midblast               midblast               midblast 
##          midblast-3-12          midblast-3-13          midblast-3-14 
##               midblast               midblast               midblast 
##          midblast-3-15          midblast-3-17          midblast-3-18 
##               midblast               midblast               midblast 
##           midblast-3-2          midblast-3-23           midblast-3-3 
##               midblast               midblast               midblast 
##           midblast-3-4           midblast-3-5           midblast-3-6 
##               midblast               midblast               midblast 
##           midblast-3-7           midblast-3-8           midblast-3-9 
##               midblast               midblast               midblast 
##               zygote-1               zygote-2               zygote-3 
##                 zygote                 zygote                 zygote 
##               zygote-4 16cell-2pooled-split6a 16cell-2pooled-split6b 
##                 zygote                 16cell                 16cell 
## 16cell-2pooled-split8a 16cell-2pooled-split8b         16cell-split1a 
##                 16cell                 16cell                 16cell 
##         16cell-split1b         16cell-split2a         16cell-split2b 
##                 16cell                 16cell                 16cell 
##   8cell-12-1-smartseq2   8cell-12-2-smartseq2   8cell-12-3-smartseq2 
##                  8cell                  8cell                  8cell 
##   8cell-12-4-smartseq2   8cell-13-1-smartseq2   8cell-14-1-smartseq2 
##                  8cell                  8cell                  8cell 
##   8cell-14-2-smartseq2   8cell-14-3-smartseq2   8cell-14-4-smartseq2 
##                  8cell                  8cell                  8cell 
##  8cell-2pooled-split5a  8cell-2pooled-split5b  8cell-2pooled-split7a 
##                  8cell                  8cell                  8cell 
##  8cell-2pooled-split7b  8cell-2pooled-split9a  8cell-2pooled-split9b 
##                  8cell                  8cell                  8cell 
##          8cell-split3a          8cell-split3b          8cell-split4a 
##                  8cell                  8cell                  8cell 
##          8cell-split4b      fibroblast-13-CxB      fibroblast-14-CxB 
##                  8cell             fibroblast             fibroblast 
##      fibroblast-15-CxB      fibroblast-16-CxB      fibroblast-17-BxC 
##             fibroblast             fibroblast             fibroblast 
##      fibroblast-19-BxC      fibroblast-20-BxC      fibroblast-21-BxC 
##             fibroblast             fibroblast             fibroblast 
##      fibroblast-22-BxC       fibroblast-9-CxB 
##             fibroblast             fibroblast 
## 13 Levels: 16cell 4cell 8cell BXC C57twocell early2cell ... zygote
ML11[["in.vivo"]] <- Idents(object = ML11)
ML11$orig.ident <- "ML11"

Loading data

ML1 <- read_rds(file='/home/moyra.lawrence/count/enumerate/Empty_vector3/seurat.obj.rds')
ML2 <- read_rds(file='/home/moyra.lawrence/count/enumerate/Dppa2/seurat.obj.rds')
ML3 <- read_rds(file='/home/moyra.lawrence/count/enumerate/Gata3/seurat.obj.rds')
ML4 <- read_rds(file='/home/moyra.lawrence/count/enumerate/MERVL_VP64/seurat.obj.rds')
ML5 <- read_rds(file='/home/moyra.lawrence/count2/enumerate/Empty_puro/seurat.obj.rds')
ML6 <- read_rds(file='/home/moyra.lawrence/count2/enumerate/Empty_hygro/seurat.obj.rds')
ML7 <- read_rds(file='/home/moyra.lawrence/count2/enumerate/Dppa2_Gata3/seurat.obj.rds')
ML8 <- read_rds(file='/home/moyra.lawrence/count2/enumerate/Dppa2_MERVL/seurat.obj.rds')
ML9 <- read_rds(file='/home/moyra.lawrence/count2/enumerate/Gata3_MERVL/seurat.obj.rds')
ML10 <- read_rds(file='/home/moyra.lawrence/count2/enumerate/Dppa2_Gata3_MERVL/seurat.obj.rds')
ML12 <- read_rds(file='/home/moyra.lawrence/count3/enumerate/EVEV/seurat.obj.rds')
ML13 <- read_rds(file='/home/moyra.lawrence/count3/enumerate/Dppa2_MERVL_TT/seurat.obj.rds')

ML1
## An object of class Seurat 
## 32294 features across 4406 samples within 1 assay 
## Active assay: RNA (32294 features, 0 variable features)
ML2
## An object of class Seurat 
## 32294 features across 3977 samples within 1 assay 
## Active assay: RNA (32294 features, 0 variable features)
ML3
## An object of class Seurat 
## 32294 features across 4835 samples within 1 assay 
## Active assay: RNA (32294 features, 0 variable features)
ML4
## An object of class Seurat 
## 32294 features across 3501 samples within 1 assay 
## Active assay: RNA (32294 features, 0 variable features)
ML5
## An object of class Seurat 
## 32294 features across 4921 samples within 1 assay 
## Active assay: RNA (32294 features, 0 variable features)
ML6
## An object of class Seurat 
## 32294 features across 4179 samples within 1 assay 
## Active assay: RNA (32294 features, 0 variable features)
ML7
## An object of class Seurat 
## 32294 features across 5105 samples within 1 assay 
## Active assay: RNA (32294 features, 0 variable features)
ML8
## An object of class Seurat 
## 32294 features across 3117 samples within 1 assay 
## Active assay: RNA (32294 features, 0 variable features)
ML9
## An object of class Seurat 
## 32294 features across 4678 samples within 1 assay 
## Active assay: RNA (32294 features, 0 variable features)
ML10
## An object of class Seurat 
## 32294 features across 3210 samples within 1 assay 
## Active assay: RNA (32294 features, 0 variable features)
ML12
## An object of class Seurat 
## 32294 features across 3438 samples within 1 assay 
## Active assay: RNA (32294 features, 0 variable features)
ML13
## An object of class Seurat 
## 32294 features across 3958 samples within 1 assay 
## Active assay: RNA (32294 features, 0 variable features)

Preparing to filter EVEV Seurat Object

ML12[["percent.mt"]] <- PercentageFeatureSet(ML12, pattern = "^mt-")
VlnPlot(ML12, pt.size=0, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
VlnPlot(ML12, pt.size=0, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, log=TRUE)
FeatureScatter(ML12, feature1 = "nCount_RNA", feature2 = "percent.mt")
FeatureScatter(ML12, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")

Preparing to filter Dppa2_MERVL_TT Seurat Object

ML13[["percent.mt"]] <- PercentageFeatureSet(ML13, pattern = "^mt-")
VlnPlot(ML13, pt.size=0, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
VlnPlot(ML13, pt.size=0, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, log=TRUE)
FeatureScatter(ML13, feature1 = "nCount_RNA", feature2 = "percent.mt")
FeatureScatter(ML13, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")

##Filtering

ML1[["percent.mt"]] <- PercentageFeatureSet(ML1, pattern = "^mt-")
ML2[["percent.mt"]] <- PercentageFeatureSet(ML2, pattern = "^mt-")
ML3[["percent.mt"]] <- PercentageFeatureSet(ML3, pattern = "^mt-")
ML4[["percent.mt"]] <- PercentageFeatureSet(ML4, pattern = "^mt-")
ML5[["percent.mt"]] <- PercentageFeatureSet(ML5, pattern = "^mt-")
ML6[["percent.mt"]] <- PercentageFeatureSet(ML6, pattern = "^mt-")
ML7[["percent.mt"]] <- PercentageFeatureSet(ML7, pattern = "^mt-")
ML8[["percent.mt"]] <- PercentageFeatureSet(ML8, pattern = "^mt-")
ML9[["percent.mt"]] <- PercentageFeatureSet(ML9, pattern = "^mt-")
ML10[["percent.mt"]] <- PercentageFeatureSet(ML10, pattern = "^mt-")

Filtering conditions for all datasets apart from embryo data and DMTT

3500 < nFeature_RNA
10,000 < nCount_RNA <100,000
0 =< percent.mt < 10

Filtering conditions for DMTT

2000 < nFeature_RNA
10,000 < nCount_RNA <100,000
0 =< percent.mt < 12

Filtering datasets

ML1.filtered <- subset(ML1, subset = nFeature_RNA > 3500 & nCount_RNA > 10000 & nCount_RNA < 100000 & percent.mt < 10)
ML2.filtered <- subset(ML2, subset = nFeature_RNA > 3500 & nCount_RNA > 10000 & nCount_RNA < 100000 & percent.mt < 10)
ML3.filtered <- subset(ML3, subset = nFeature_RNA > 3500 & nCount_RNA > 10000 & nCount_RNA < 100000 & percent.mt < 10)
ML4.filtered <- subset(ML4, subset = nFeature_RNA > 3500 & nCount_RNA > 10000 & nCount_RNA < 100000 & percent.mt < 10)
ML5.filtered <- subset(ML5, subset = nFeature_RNA > 3500 & nCount_RNA > 10000 & nCount_RNA < 100000 & percent.mt < 10)
ML6.filtered <- subset(ML6, subset = nFeature_RNA > 3500 & nCount_RNA > 10000 & nCount_RNA < 100000 & percent.mt < 10)
ML7.filtered <- subset(ML7, subset = nFeature_RNA > 3500 & nCount_RNA > 10000 & nCount_RNA < 100000 & percent.mt < 10)
ML8.filtered <- subset(ML8, subset = nFeature_RNA > 3500 & nCount_RNA > 10000 & nCount_RNA < 100000 & percent.mt < 10)
ML9.filtered <- subset(ML9, subset = nFeature_RNA > 3500 & nCount_RNA > 10000 & nCount_RNA < 100000 & percent.mt < 10)
ML10.filtered <- subset(ML10, subset = nFeature_RNA > 3500 & nCount_RNA > 10000 & nCount_RNA < 100000 & percent.mt < 10)
ML12.filtered <- subset(ML12, subset = nFeature_RNA > 3500 & nCount_RNA > 10000 & nCount_RNA < 100000 & percent.mt < 10)
ML13.filtered <- subset(ML13, subset = nFeature_RNA > 2000 & nCount_RNA > 10000 & nCount_RNA < 100000 & percent.mt < 12)

Confirming filtering EVEV

VlnPlot(ML12.filtered, pt.size=0, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
VlnPlot(ML12.filtered, pt.size=0, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, log=TRUE)
FeatureScatter(ML12.filtered, feature1 = "nCount_RNA", feature2 = "percent.mt")
FeatureScatter(ML12.filtered, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")

Confirming filtering DMTT

VlnPlot(ML13.filtered, pt.size=0, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
VlnPlot(ML13.filtered, pt.size=0, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, log=TRUE)
FeatureScatter(ML13.filtered, feature1 = "nCount_RNA", feature2 = "percent.mt")
FeatureScatter(ML13.filtered, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")

Merging data

merge.filtered <- merge(ML1.filtered, y = c(ML2.filtered, ML3.filtered, ML4.filtered, ML5.filtered, ML6.filtered, ML7.filtered, ML8.filtered, ML9.filtered, ML10.filtered, ML11, ML12.filtered, ML13.filtered), add.cell.ids = c("Empty_hygro_1", "Dppa2", "Gata3", "MERVL", "Empty_puro", "Empty_hygro_2", "Dppa2_Gata3", "Dppa2_MERVL", "Gata3_MERVL", "Dppa2_Gata3_MERVL", "Embryo", "Empty_puro_Empty_hygro", "Dppa2_MERVL_TT"), project = "ML1")
merge.filtered
## An object of class Seurat 
## 36332 features across 39324 samples within 1 assay 
## Active assay: RNA (36332 features, 0 variable features)

Checking merge

head(colnames(merge.filtered))
## [1] "Empty_hygro_1_AAACCCAAGGAAGTAG.1" "Empty_hygro_1_AAACCCAGTCGCACGT.1"
## [3] "Empty_hygro_1_AAACCCAGTGTGCTTA.1" "Empty_hygro_1_AAACCCATCTGGTCAA.1"
## [5] "Empty_hygro_1_AAACGAAAGTAACCGG.1" "Empty_hygro_1_AAACGAAGTCCTGGTG.1"
tail(colnames(merge.filtered))
## [1] "Dppa2_MERVL_TT_TTTGGAGCATCCCACT.1" "Dppa2_MERVL_TT_TTTGGAGGTCTGTCAA.1"
## [3] "Dppa2_MERVL_TT_TTTGGTTCACGTAGTT.1" "Dppa2_MERVL_TT_TTTGGTTGTTTGTTCT.1"
## [5] "Dppa2_MERVL_TT_TTTGTTGCAGCTGCCA.1" "Dppa2_MERVL_TT_TTTGTTGTCACTCTTA.1"
unique(sapply(X = strsplit(colnames(merge.filtered), split = "_"), FUN = "[", 1))
## [1] "Empty"  "Dppa2"  "Gata3"  "MERVL"  "Embryo"
table(merge.filtered$orig.ident)
## 
##             Dppa2       Dppa2_Gata3 Dppa2_Gata3_MERVL       Dppa2_MERVL 
##              3153              4315              2665              2090 
##    Dppa2_MERVL_TT       Empty_hygro        Empty_puro     Empty_vector3 
##              2072              3565              4144              3664 
##              EVEV             Gata3       Gata3_MERVL        MERVL_VP64 
##              2448              3956              3888              3047 
##              ML11 
##               317
table(merge.filtered$in.vivo)
## 
##     16cell      4cell      8cell        BXC C57twocell early2cell earlyblast 
##         58         14         47         13          8          8         43 
## fibroblast  late2cell  lateblast   mid2cell   midblast     zygote 
##         10         10         30         12         60          4

Confirming filtering

vp1 <- VlnPlot(merge.filtered, pt.size=0, features = c("nFeature_RNA")) +  theme(legend.position = 'none')
vp2 <- VlnPlot(merge.filtered, pt.size=0, features = c("nCount_RNA")) +  theme(legend.position = 'none')
vp3 <- VlnPlot(merge.filtered, pt.size=0, features = c("percent.mt")) + theme(legend.position = 'none')
vp1.log <- VlnPlot(merge.filtered, pt.size=0, features = c("nFeature_RNA"),log=TRUE)+  theme(legend.position = 'none')
vp2.log <- VlnPlot(merge.filtered, pt.size=0, features = c("nCount_RNA"),log=TRUE)+  theme(legend.position = 'none')
vp3.log <- VlnPlot(merge.filtered, pt.size=0, features = c("percent.mt"),log=TRUE) +  theme(legend.position = 'none')
grid.arrange(vp1,vp2,vp3, nrow=1)
grid.arrange(vp1.log,vp2.log,vp3.log, nrow=1)
FeatureScatter(merge.filtered, "nCount_RNA", "percent.mt", pt.size=0.5) + scale_y_continuous(limits=c(0,100)) + scale_x_continuous(limits=c(0,100000))
FeatureScatter(merge.filtered, "nCount_RNA", "nFeature_RNA", pt.size=0.5) + scale_y_continuous(limits=c(0,12000)) + scale_x_continuous(limits=c(0,100000))

Splitting dataset

merge.list <- SplitObject(merge.filtered, split.by = "orig.ident")
merge.list <- lapply(X = merge.list, FUN = function(x) {
    x <- NormalizeData(x)
    x <- FindVariableFeatures(x, verbose = FALSE)
})

features <- SelectIntegrationFeatures(object.list = merge.list)
merge.list <- lapply(X = merge.list, FUN = function(x) {
    x <- ScaleData(x, features = features, verbose = FALSE)
    x <- RunPCA(x, features = features, verbose = FALSE)
})

Performing integration with embryo and WT ESCs as reference

anchors <- FindIntegrationAnchors(object.list = merge.list, reference = c(1, 5, 6, 11, 12), reduction = "rpca",
    dims = 1:50 )
totipotency <- IntegrateData(anchorset = anchors, dims = 1:50)

#Save object

DefaultAssay(totipotency) <- "integrated"
save(totipotency, file="Run3.rda")
#load(file='Run3.rda')

Run the standard workflow for visualisation and clustering

Identify variable genes 1

totipotency <- FindVariableFeatures(totipotency, selection.method = "vst", nfeatures = 3000)
top10 <- head(VariableFeatures(totipotency), 10)
plot1 <- VariableFeaturePlot(totipotency)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE, xnudge=0, ynudge=0)
plot2

### Scaling

all.genes <- rownames(totipotency)
totipotency <- ScaleData(totipotency, features = all.genes)

#Omitted Normalization as this is an integrated object

PCA combined dataset

totipotency <- RunPCA(totipotency, npcs=100, features = rownames(totipotency))

print(totipotency[["pca"]], dims = 1:6, nfeatures = 5)
VizDimLoadings(totipotency, dims = 1:6, reduction = "pca")
DimPlot(totipotency, reduction = "pca")
DimHeatmap(totipotency, dims = 1:6, cells = 500, balanced = TRUE)
ElbowPlot(totipotency, ndims=100)

UMAP resolution = 0.6 for totipotency

dim(totipotency)
#0.4-1.2 for resolution for 3000 cells, according to seurat tutorial
totipotency <- FindNeighbors(totipotency, dims = 1:50)
totipotency <- FindClusters(totipotency, resolution = 0.4)
totipotency <- RunUMAP(totipotency, dims = 1:50)

Original identity UMAP

p1 <- DimPlot(totipotency, reduction = "umap", group.by = "orig.ident")
p1

Clusters on UMAP

p2 <- DimPlot(totipotency, reduction = "umap", label = TRUE, repel = TRUE)
p2

Quality control

DefaultAssay(totipotency) <- "RNA"
totipotency <- NormalizeData(totipotency)
totipotency <- FindVariableFeatures(totipotency, selection.method = "vst")

Defining cell cycle genes

g2m_1.genes <-  c("Hmgb2","Cdk1","Nusap1","Ube2c","Birc5","Tpx2","Top2a","Ndc80","Cks2","Nuf2","Cks1b", "Mki67",   "Tmpo","Cenpf","Tacc3","Fam64a","Smc4","Ccnb2","Ckap2l","Ckap2","Aurkb","Bub1","Kif11","Anp32e","Tubb4b","Gtse1","Kif20b","Hjurp","Cdca3","Hn1","Cdc20","Ttk","Cdc25c","Kif2c","Rangap1","Ncapd2","Dlgap5","Cdca2","Cdca8","Ect2","Kif23","Hmmr","Aurka","Psrc1","Anln","Lbr","Ckap5","Cenpe","Ctcf","Nek2","G2e3","Gas2l3","Cbx5","Cenpa") 

s_1.genes <-  c("Mcm5","Pcna","Tyms","Fen1","Mcm2","Mcm4","Rrm1","Ung","Gins2","Mcm6","Cdca7","Dtl","Prim1","Uhrf1","Mlf1ip","Hells","Rfc2","Rpa2","Nasp","Rad51ap1","Gmnn","Wdr76","Slbp","Ccne2","Ubr7","Pold3","Msh2","Atad2","Rad51","Rrm2","Cdc45","Cdc6","Exo1","Tipin","Dscc1","Blm","Casp8ap2","Usp1","Clspn","Pola1","Chaf1b","Brip1","E2f8")   

Assigning cell cycle score

totipotency <- CellCycleScoring(totipotency, s.features = s_1.genes, g2m.features = g2m_1.genes, set.ident = TRUE)

# view cell cycle scores and phase assignments
head(totipotency[[]])
##                                     orig.ident nCount_RNA nFeature_RNA
## Empty_hygro_1_AAACCCAAGGAAGTAG.1 Empty_vector3      20185         4192
## Empty_hygro_1_AAACCCAGTCGCACGT.1 Empty_vector3      26369         5163
## Empty_hygro_1_AAACCCAGTGTGCTTA.1 Empty_vector3      54120         7692
## Empty_hygro_1_AAACCCATCTGGTCAA.1 Empty_vector3      28287         5744
## Empty_hygro_1_AAACGAAAGTAACCGG.1 Empty_vector3      30029         5261
## Empty_hygro_1_AAACGAAGTCCTGGTG.1 Empty_vector3      19055         4861
##                                  percent.mt in.vivo integrated_snn_res.0.4
## Empty_hygro_1_AAACCCAAGGAAGTAG.1   4.230865    <NA>                      1
## Empty_hygro_1_AAACCCAGTCGCACGT.1   3.985741    <NA>                      1
## Empty_hygro_1_AAACCCAGTGTGCTTA.1   4.477088    <NA>                      0
## Empty_hygro_1_AAACCCATCTGGTCAA.1   4.008909    <NA>                      0
## Empty_hygro_1_AAACGAAAGTAACCGG.1   4.878617    <NA>                      1
## Empty_hygro_1_AAACGAAGTCCTGGTG.1   4.733666    <NA>                      2
##                                  seurat_clusters     S.Score    G2M.Score Phase
## Empty_hygro_1_AAACCCAAGGAAGTAG.1               1 -0.05283798 -0.310921404    G1
## Empty_hygro_1_AAACCCAGTCGCACGT.1               1  0.14582351 -0.317864665     S
## Empty_hygro_1_AAACCCAGTGTGCTTA.1               0  0.09708147 -0.212689429     S
## Empty_hygro_1_AAACCCATCTGGTCAA.1               0 -0.02977374 -0.098052381    G1
## Empty_hygro_1_AAACGAAAGTAACCGG.1               1 -0.18860703 -0.022766625    G1
## Empty_hygro_1_AAACGAAGTCCTGGTG.1               2 -0.16530321 -0.001745498    G1
##                                  old.ident
## Empty_hygro_1_AAACCCAAGGAAGTAG.1         1
## Empty_hygro_1_AAACCCAGTCGCACGT.1         1
## Empty_hygro_1_AAACCCAGTGTGCTTA.1         0
## Empty_hygro_1_AAACCCATCTGGTCAA.1         0
## Empty_hygro_1_AAACGAAAGTAACCGG.1         1
## Empty_hygro_1_AAACGAAGTCCTGGTG.1         2

Plotting cell cycle score on UMAP

DimPlot(totipotency, reduction = "umap")

Scaling out cell cycle

DefaultAssay(totipotency) <- "integrated"
totipotency <- ScaleData(totipotency, vars.to.regress=c("S.Score","G2M.Score"))
## Regressing out S.Score, G2M.Score
## Centering and scaling data matrix
save(totipotency, file="Run3_cell_cycle_corrected3.rda")
#load(file='Run3_cell_cycle_corrected3.rda')

Rerun PCA, UMAP and clustering

totipotency <- RunPCA(totipotency, npcs = 100, verbose = FALSE)
totipotency <- RunUMAP(totipotency, reduction = "pca", dims = 1:30)
## 16:07:16 UMAP embedding parameters a = 0.9922 b = 1.112
## 16:07:16 Read 39324 rows and found 30 numeric columns
## 16:07:16 Using Annoy for neighbor search, n_neighbors = 30
## 16:07:16 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 16:07:23 Writing NN index file to temp file /tmp/RtmpqsbJ4K/fileab3a26e0274b
## 16:07:23 Searching Annoy index using 1 thread, search_k = 3000
## 16:07:37 Annoy recall = 100%
## 16:07:38 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 16:07:40 Initializing from normalized Laplacian + noise (using irlba)
## 16:07:41 Commencing optimization for 200 epochs, with 1756584 positive edges
## 16:08:05 Optimization finished
totipotency <- FindNeighbors(totipotency, dims = 1:50)
## Computing nearest neighbor graph
## Computing SNN
totipotency <- FindClusters(totipotency, resolution = 0.18)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 39324
## Number of edges: 1100254
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9022
## Number of communities: 12
## Elapsed time: 7 seconds
## 2 singletons identified. 10 final clusters.
DimPlot(totipotency, reduction = "umap", label = TRUE, repel = TRUE)

p1 <- DimPlot(totipotency, reduction = "umap", group.by = "orig.ident")
p1

Integrated analysis

Identify variable genes 2

tot <- FindVariableFeatures(totipotency, selection.method = "vst", nfeatures = 3000)
top10_2 <- head(VariableFeatures(tot), 10)
plot1 <- VariableFeaturePlot(tot)
plot2 <- LabelPoints(plot = plot1, points = top10_2, repel = TRUE, xnudge=0, ynudge=0)
plot2

Plot PCA2

print(totipotency[["pca"]], dims = 1:6, nfeatures = 5)
## PC_ 1 
## Positive:  Exoc4, Ranbp17, Adam23, Dst, Trip12 
## Negative:  Tubb4b, Cdc20, Hspa8, Ube2c, Actb 
## PC_ 2 
## Positive:  L1td1, Klf2, Ldha, Zfp42, Enox1 
## Negative:  Trp53inp1, Phlda3, Btg2, Ptpn14, Sdc4 
## PC_ 3 
## Positive:  Phlda3, Ptpn14, Krt18, Perp, Cdkn1a 
## Negative:  Platr31, Gm41409, Gm45659, Gm28940, Gm35974 
## PC_ 4 
## Positive:  Sfmbt2, Col4a1, Col4a2, Hspa5, Calr 
## Negative:  Ano3, Hist1h2ae, Ahnak2, D630023F18Rik, Trp53inp1 
## PC_ 5 
## Positive:  Hist1h2ae, Hist1h1b, Hist1h1e, Hist1h2ap, Hist1h4d 
## Negative:  Tcf15, Nanog, Kdm5b, Nr5a2, Epha4 
## PC_ 6 
## Positive:  Ptch1, Map4k4, Utrn, Nrp2, Tfap2c 
## Negative:  Gabarapl2, Gm2022, Gm5662, Gm2035, Gm5039
VizDimLoadings(totipotency, dims = 1:6, reduction = "pca")

DimPlot(totipotency, reduction = "pca")

DimHeatmap(totipotency, dims = 1:6, cells = 500, balanced = TRUE)

ElbowPlot(totipotency, ndims=100)

Quantifying cells in new clusters

tbl_cell <- table(totipotency@active.ident, totipotency@meta.data$orig.ident)
tbl_add <- as.data.frame(matrix(paste0(tbl_cell, " (", round(sweep(tbl_cell, 2, colSums(tbl_cell), FUN="/"), 3) * 100, "%)"), ncol=ncol(tbl_cell)))
names(tbl_add) <- colnames(tbl_cell)
tbl_add$rownames <- rownames(tbl_cell)
tbl_add %>% gt(rowname_col="rownames") %>% tab_header(title = "Libraries per cluster")
Libraries per cluster
Dppa2 Dppa2_Gata3 Dppa2_Gata3_MERVL Dppa2_MERVL Dppa2_MERVL_TT Empty_hygro Empty_puro Empty_vector3 EVEV Gata3 Gata3_MERVL MERVL_VP64 ML11
0 2287 (72.5%) 2775 (64.3%) 1747 (65.6%) 1317 (63%) 254 (12.3%) 2432 (68.2%) 2904 (70.1%) 2632 (71.8%) 1507 (61.6%) 1539 (38.9%) 2349 (60.4%) 1998 (65.6%) 15 (4.7%)
1 390 (12.4%) 702 (16.3%) 534 (20%) 369 (17.7%) 112 (5.4%) 680 (19.1%) 662 (16%) 503 (13.7%) 539 (22%) 412 (10.4%) 603 (15.5%) 480 (15.8%) 3 (0.9%)
2 360 (11.4%) 485 (11.2%) 265 (9.9%) 220 (10.5%) 66 (3.2%) 287 (8.1%) 383 (9.2%) 381 (10.4%) 196 (8%) 345 (8.7%) 498 (12.8%) 318 (10.4%) 0 (0%)
3 47 (1.5%) 230 (5.3%) 43 (1.6%) 38 (1.8%) 47 (2.3%) 121 (3.4%) 135 (3.3%) 88 (2.4%) 129 (5.3%) 1581 (40%) 356 (9.2%) 134 (4.4%) 127 (40.1%)
4 25 (0.8%) 48 (1.1%) 31 (1.2%) 84 (4%) 1555 (75%) 1 (0%) 0 (0%) 1 (0%) 4 (0.2%) 39 (1%) 39 (1%) 13 (0.4%) 21 (6.6%)
5 30 (1%) 25 (0.6%) 27 (1%) 12 (0.6%) 13 (0.6%) 15 (0.4%) 27 (0.7%) 43 (1.2%) 4 (0.2%) 38 (1%) 18 (0.5%) 76 (2.5%) 0 (0%)
6 14 (0.4%) 47 (1.1%) 16 (0.6%) 23 (1.1%) 8 (0.4%) 18 (0.5%) 16 (0.4%) 12 (0.3%) 8 (0.3%) 0 (0%) 19 (0.5%) 26 (0.9%) 0 (0%)
7 0 (0%) 2 (0%) 2 (0.1%) 27 (1.3%) 5 (0.2%) 11 (0.3%) 17 (0.4%) 3 (0.1%) 61 (2.5%) 2 (0.1%) 3 (0.1%) 0 (0%) 2 (0.6%)
8 0 (0%) 1 (0%) 0 (0%) 0 (0%) 12 (0.6%) 0 (0%) 0 (0%) 1 (0%) 0 (0%) 0 (0%) 3 (0.1%) 2 (0.1%) 116 (36.6%)
9 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 33 (10.4%)

Quantifying embryo cells in new clusters

tbl_cell <- table(totipotency@active.ident, totipotency@meta.data$in.vivo)
tbl_add <- as.data.frame(matrix(paste0(tbl_cell, " (", round(sweep(tbl_cell, 2, colSums(tbl_cell), FUN="/"), 3) * 100, "%)"), ncol=ncol(tbl_cell)))
names(tbl_add) <- colnames(tbl_cell)
tbl_add$rownames <- rownames(tbl_cell)
tbl_add %>% gt(rowname_col="rownames") %>% tab_header(title = "Embryo cells per cluster")
Embryo cells per cluster
16cell 4cell 8cell BXC C57twocell early2cell earlyblast fibroblast late2cell lateblast mid2cell midblast zygote
0 4 (6.9%) 0 (0%) 1 (2.1%) 1 (7.7%) 0 (0%) 0 (0%) 6 (14%) 0 (0%) 0 (0%) 2 (6.7%) 0 (0%) 1 (1.7%) 0 (0%)
1 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 3 (30%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
2 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
3 0 (0%) 0 (0%) 1 (2.1%) 0 (0%) 0 (0%) 0 (0%) 33 (76.7%) 7 (70%) 0 (0%) 28 (93.3%) 0 (0%) 58 (96.7%) 0 (0%)
4 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 9 (90%) 0 (0%) 12 (100%) 0 (0%) 0 (0%)
5 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
6 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
7 2 (3.4%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
8 51 (87.9%) 14 (100%) 45 (95.7%) 0 (0%) 0 (0%) 0 (0%) 4 (9.3%) 0 (0%) 1 (10%) 0 (0%) 0 (0%) 1 (1.7%) 0 (0%)
9 1 (1.7%) 0 (0%) 0 (0%) 12 (92.3%) 8 (100%) 8 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 4 (100%)

Totipotency, pluripotency and PE markers in each cluster

Find markers for every new cluster 2

DefaultAssay(totipotency) <- "RNA"
mus.markers <- FindAllMarkers(totipotency, only.pos = TRUE, min.pct =0.1, logfc.threshold = 0.25)
write.xlsx(mus.markers, file = 'Run3_markers.xlsx')
mus.markers %>% group_by(cluster) %>% top_n(n = 4, wt = avg_log2FC)

RNA slot of new object

totipotency <- ScaleData(totipotency, features = all.genes)
VlnPlot(totipotency, features = features_vec, pt.size=0)

#Heatmap

top10 <- mus.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC)
DoHeatmap(totipotency, features = top10$gene)
## Warning in DoHeatmap(totipotency, features = top10$gene): The following features
## were omitted as they were not found in the scale.data slot for the RNA assay:
## Trf, Zbed3, Ahsg, Apoa2, Apoa1, Serpina1b, Gc, LOC100502936, Serpina3k, Alb,
## Isyna1, Pdxk, Gpd1l, Gnb2l1, Gm11517, Tceb1, Alppl2, Timd2, Eif2s2, Rpl29, Sub1,
## Prdx1, Eno1, Snhg12, Gas5, Atp6v0b, 1110038B12Rik, Lamb1, Ung, Usp37, Cdc6,
## Rrm2, Slbp, Dtl

Original identity on new UMAP

p1 <- DimPlot(totipotency, reduction = "umap", group.by = "orig.ident")
DimPlot(totipotency, reduction = "umap", label = TRUE, repel = TRUE)

p1

Cell types from in vivo data

p1 <- DimPlot(totipotency, reduction = "umap", group.by = "in.vivo", pt.size = 1)
p1

Final classification

new.cluster.ids <- c("Pluripotent", "Pluripotent mitosis", "Pluripotent apoptosis", "Primitive Endoderm", "Totipotent","Pluripotent ubiquitin", "Pluripotent", "Pluripotent splicing translation", "Trophectoderm", "Zygote, early 2cell, 16 cell, liver")
names(new.cluster.ids) <- levels(totipotency)
totipotency <- RenameIdents(totipotency, new.cluster.ids)
DimPlot(totipotency, reduction = "umap", label = TRUE, pt.size = 0.5, repel = TRUE) + NoLegend()

Libraries per cluster

tbl_cell <- table(totipotency@active.ident, totipotency@meta.data$orig.ident)
tbl_add <- as.data.frame(matrix(paste0(tbl_cell, " (", round(sweep(tbl_cell, 2, colSums(tbl_cell), FUN="/"), 3) * 100, "%)"), ncol=ncol(tbl_cell)))
names(tbl_add) <- colnames(tbl_cell)
tbl_add$rownames <- rownames(tbl_cell)
tbl_add %>% gt(rowname_col="rownames") %>% tab_header(title = "Cluster_distribution")
Cluster_distribution
Dppa2 Dppa2_Gata3 Dppa2_Gata3_MERVL Dppa2_MERVL Dppa2_MERVL_TT Empty_hygro Empty_puro Empty_vector3 EVEV Gata3 Gata3_MERVL MERVL_VP64 ML11
Pluripotent 2301 (73%) 2822 (65.4%) 1763 (66.2%) 1340 (64.1%) 262 (12.6%) 2450 (68.7%) 2920 (70.5%) 2644 (72.2%) 1515 (61.9%) 1539 (38.9%) 2368 (60.9%) 2024 (66.4%) 15 (4.7%)
Pluripotent mitosis 390 (12.4%) 702 (16.3%) 534 (20%) 369 (17.7%) 112 (5.4%) 680 (19.1%) 662 (16%) 503 (13.7%) 539 (22%) 412 (10.4%) 603 (15.5%) 480 (15.8%) 3 (0.9%)
Pluripotent apoptosis 360 (11.4%) 485 (11.2%) 265 (9.9%) 220 (10.5%) 66 (3.2%) 287 (8.1%) 383 (9.2%) 381 (10.4%) 196 (8%) 345 (8.7%) 498 (12.8%) 318 (10.4%) 0 (0%)
Primitive Endoderm 47 (1.5%) 230 (5.3%) 43 (1.6%) 38 (1.8%) 47 (2.3%) 121 (3.4%) 135 (3.3%) 88 (2.4%) 129 (5.3%) 1581 (40%) 356 (9.2%) 134 (4.4%) 127 (40.1%)
Totipotent 25 (0.8%) 48 (1.1%) 31 (1.2%) 84 (4%) 1555 (75%) 1 (0%) 0 (0%) 1 (0%) 4 (0.2%) 39 (1%) 39 (1%) 13 (0.4%) 21 (6.6%)
Pluripotent ubiquitin 30 (1%) 25 (0.6%) 27 (1%) 12 (0.6%) 13 (0.6%) 15 (0.4%) 27 (0.7%) 43 (1.2%) 4 (0.2%) 38 (1%) 18 (0.5%) 76 (2.5%) 0 (0%)
Pluripotent splicing translation 0 (0%) 2 (0%) 2 (0.1%) 27 (1.3%) 5 (0.2%) 11 (0.3%) 17 (0.4%) 3 (0.1%) 61 (2.5%) 2 (0.1%) 3 (0.1%) 0 (0%) 2 (0.6%)
Trophectoderm 0 (0%) 1 (0%) 0 (0%) 0 (0%) 12 (0.6%) 0 (0%) 0 (0%) 1 (0%) 0 (0%) 0 (0%) 3 (0.1%) 2 (0.1%) 116 (36.6%)
Zygote, early 2cell, 16 cell, liver 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 33 (10.4%)

Embryo cells per cluster

tbl_cell <- table(totipotency@active.ident, totipotency@meta.data$in.vivo)
tbl_add <- as.data.frame(matrix(paste0(tbl_cell, " (", round(sweep(tbl_cell, 2, colSums(tbl_cell), FUN="/"), 3) * 100, "%)"), ncol=ncol(tbl_cell)))
names(tbl_add) <- colnames(tbl_cell)
tbl_add$rownames <- rownames(tbl_cell)
tbl_add %>% gt(rowname_col="rownames") %>% tab_header(title = "Cluster_distribution")
Cluster_distribution
16cell 4cell 8cell BXC C57twocell early2cell earlyblast fibroblast late2cell lateblast mid2cell midblast zygote
Pluripotent 4 (6.9%) 0 (0%) 1 (2.1%) 1 (7.7%) 0 (0%) 0 (0%) 6 (14%) 0 (0%) 0 (0%) 2 (6.7%) 0 (0%) 1 (1.7%) 0 (0%)
Pluripotent mitosis 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 3 (30%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
Pluripotent apoptosis 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
Primitive Endoderm 0 (0%) 0 (0%) 1 (2.1%) 0 (0%) 0 (0%) 0 (0%) 33 (76.7%) 7 (70%) 0 (0%) 28 (93.3%) 0 (0%) 58 (96.7%) 0 (0%)
Totipotent 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 9 (90%) 0 (0%) 12 (100%) 0 (0%) 0 (0%)
Pluripotent ubiquitin 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
Pluripotent splicing translation 2 (3.4%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
Trophectoderm 51 (87.9%) 14 (100%) 45 (95.7%) 0 (0%) 0 (0%) 0 (0%) 4 (9.3%) 0 (0%) 1 (10%) 0 (0%) 0 (0%) 1 (1.7%) 0 (0%)
Zygote, early 2cell, 16 cell, liver 1 (1.7%) 0 (0%) 0 (0%) 12 (92.3%) 8 (100%) 8 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 4 (100%)

Split plot per library

DimPlot(totipotency, reduction = "umap", split.by = "orig.ident", ncol = 3, pt.size = 2)

Save data as Run3_final3

save(totipotency, file="Run3_final.rda")
#load("Run3_final.rda")

Plotting cell cycle on new UMAP

DefaultAssay(totipotency) <- "RNA"
totipotency <- CellCycleScoring(totipotency, s.features = s_1.genes, g2m.features = g2m_1.genes, set.ident = TRUE)
DimPlot(totipotency, reduction = "umap")

#Cell cycle bar chart

totipotency@meta.data %>% ggplot(aes(x=old.ident,fill=Phase)) +
  geom_bar(position = "fill",size = 3, width = .8) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 11)) +
  labs(x = "", y = "Cell fraction")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.

#Feature plot

FeaturePlot(totipotency, features = "MERVL")

#Plotting DMTT

#Idents(object = totipotency) <- "orig.ident"
#highlight <- WhichCells(totipotency, ident = "Dppa2_MERVL_TT")
#DimPlot(totipotency, reduction = "umap", cells.highlight = highlight, pt.size = 1, cols.highlight = c("darkgreen"))

#Subsetting on totipotency

Idents(object = totipotency) <- "seurat_clusters"
tot <- subset(x = totipotency, idents = "4")
DimPlot(tot, reduction = "umap")

save(tot, file="Run3_final_tot_cluster.rda")

Run the standard workflow for visualisation and clustering

Identify variable genes in totipotent cluster

tot <- FindVariableFeatures(tot, selection.method = "vst", nfeatures = 3000)
top10 <- head(VariableFeatures(tot), 10)
plot1 <- VariableFeaturePlot(tot)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE, xnudge=0, ynudge=0)
plot2

Scaling

all.genes <- rownames(tot)
tot <- ScaleData(tot, features = all.genes)

PCA

tot <- RunPCA(tot, npcs=100, features = rownames(tot))

print(tot[["pca"]], dims = 1:6, nfeatures = 5)
VizDimLoadings(tot, dims = 1:6, reduction = "pca")
DimPlot(tot, reduction = "pca")
DimHeatmap(tot, dims = 1:6, cells = 500, balanced = TRUE)
ElbowPlot(tot, ndims=100)

UMAP resolution = 0.6 for totipotency

dim(tot)
#0.4-1.2 for resolution for 3000 cells, according to seurat tutorial
tot <- FindNeighbors(tot, dims = 1:50)
tot <- FindClusters(tot, resolution = 0.2)
tot <- RunUMAP(tot, dims = 1:50)
DimPlot(tot, reduction = "umap", label = TRUE, repel = TRUE)

Original identity UMAP

p1 <- DimPlot(tot, reduction = "umap", group.by = "orig.ident")
p1

## Normalise and find variable features

DefaultAssay(tot) <- "RNA"
tot <- NormalizeData(tot)
tot <- FindVariableFeatures(tot, selection.method = "vst")

Quantify cells in new clusters

tbl_cell <- table(tot@active.ident, tot@meta.data$orig.ident)
tbl_add <- as.data.frame(matrix(paste0(tbl_cell, " (", round(sweep(tbl_cell, 2, colSums(tbl_cell), FUN="/"), 3) * 100, "%)"), ncol=ncol(tbl_cell)))
names(tbl_add) <- colnames(tbl_cell)
tbl_add$rownames <- rownames(tbl_cell)
tbl_add 
##      Dppa2 Dppa2_Gata3 Dppa2_Gata3_MERVL Dppa2_MERVL Dppa2_MERVL_TT Empty_hygro
## 1   0 (0%)    1 (2.1%)            0 (0%)    1 (1.2%)      809 (52%)      0 (0%)
## 2   1 (4%)    3 (6.2%)            0 (0%)    1 (1.2%)      746 (48%)      0 (0%)
## 3 24 (96%)  44 (91.7%)         31 (100%)  82 (97.6%)         0 (0%)    1 (100%)
## 4   0 (0%)      0 (0%)            0 (0%)      0 (0%)         0 (0%)      0 (0%)
##   Empty_vector3     EVEV     Gata3 Gata3_MERVL MERVL_VP64      ML11 rownames
## 1        0 (0%)   0 (0%)    0 (0%)      0 (0%)     0 (0%)    0 (0%)        0
## 2        0 (0%)   0 (0%)    0 (0%)      0 (0%)     0 (0%)    0 (0%)        1
## 3      1 (100%) 4 (100%) 39 (100%)   39 (100%)  13 (100%)    0 (0%)        2
## 4        0 (0%)   0 (0%)    0 (0%)      0 (0%)     0 (0%) 21 (100%)        3

Scale data

Find markers for every new cluster

DefaultAssay(tot) <- "RNA"
tot.markers <- FindAllMarkers(tot, only.pos = TRUE, min.pct =0.1, logfc.threshold = 0.25)
write.xlsx(tot.markers, file = 'Tot_markers.xlsx')
tot.markers %>% group_by(cluster) %>% top_n(n = 4, wt = avg_log2FC)

#Heatmap

top10 <- tot.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC)
DoHeatmap(tot, features = top10$gene) + NoLegend() + theme(axis.text.y = element_text(size = 4.5))

Original identity on new UMAP

p1 <- DimPlot(tot, reduction = "umap", group.by = "orig.ident")
DimPlot(tot, reduction = "umap", label = TRUE, repel = TRUE)

p1

Cell types from in vivo data

p1 <- DimPlot(tot, reduction = "umap", group.by = "in.vivo")
p1

Save data as Tot_reclustered

save(tot, file="Tot_reclustered.rda")